1 Libraries and Data

1.1 Libraries

  library(phyloseq)
  library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
  library(decontam)
  library(ggplot2)
  library(vegan)
## Loading required package: permute
  library(ggordiplots)
## Loading required package: glue
  library(microbiome)
## 
## microbiome R package (microbiome.github.com)
##     
## 
## 
##  Copyright (C) 2011-2022 Leo Lahti, 
##     Sudarshan Shetty et al. <microbiome.github.io>
## 
## Attaching package: 'microbiome'
## The following object is masked from 'package:vegan':
## 
##     diversity
## The following object is masked from 'package:ggplot2':
## 
##     alpha
## The following object is masked from 'package:base':
## 
##     transform
  library(pheatmap)
  library(cowplot)
  library(RColorBrewer)
  library(ggvegan)
  library(ggrepel)
  library(microbiomeMarker)
## Registered S3 method overwritten by 'gplots':
##   method         from     
##   reorder.factor DescTools
## 
## Attaching package: 'microbiomeMarker'
## The following objects are masked from 'package:microbiome':
## 
##     abundances, aggregate_taxa
## The following object is masked from 'package:phyloseq':
## 
##     plot_heatmap
  library(gllvm)
## Loading required package: TMB
## 
## Attaching package: 'TMB'
## The following object is masked from 'package:microbiomeMarker':
## 
##     normalize
## 
## Attaching package: 'gllvm'
## The following object is masked from 'package:vegan':
## 
##     ordiplot
## The following object is masked from 'package:stats':
## 
##     simulate
  library(microbiomeutilities)
## Warning: replacing previous import 'ggplot2::alpha' by 'microbiome::alpha' when
## loading 'microbiomeutilities'
## 
## Attaching package: 'microbiomeutilities'
## The following object is masked from 'package:microbiome':
## 
##     add_refseq
  library(MoMAColors)
## Registered S3 method overwritten by 'MoMAColors':
##   method        from     
##   print.palette DescTools

1.2 Global Plotting Options

Quick access options for output graphics to make legends and axis labels larger / more legible

#Global Plot Options
            plotopts<- theme(axis.text.y=element_text(size=20),axis.text.x=element_text(size=12),axis.title=element_text(size=20),strip.text=element_text(size=20),legend.title = element_text(size=20),legend.text = element_text(size=20)) 

#Smalller Legend Labs
            plotopts2<- theme(axis.text.y=element_text(size=20),axis.text.x=element_text(size=12),axis.title=element_text(size=20),strip.text=element_text(size=20),legend.title = element_text(size=15),legend.text = element_text(size=12)) 

## Global Site Colors
  sitecols<-c(brewer.pal(8,"Set2"),brewer.pal(9,"Paired")[9])


###### Stop Warnings turning into Errors
  options(warn=1)

1.3 Microbiome Data

  ps_woodchester<-readRDS('Woodchester Phyloseq Jun25 RID.Rdata')

2 Decontamination

2.1 Data Cleanup

############ DATA CLEANUP 
  
  #Prune Taxa With No Phylum Assignment 
    ps_prune<-prune_taxa(as.vector(!is.na(tax_table(ps_woodchester)[,2])),ps_woodchester)
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
      ntaxa(ps_woodchester)-ntaxa(ps_prune) 
## [1] 91
  #Prune Chloroplasts
      ps_prune_nochloro<-prune_taxa(as.vector(tax_table(ps_prune)[,4]!="Chloroplast"),ps_prune)
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
      ntaxa(ps_prune)-ntaxa(ps_prune_nochloro) 
## [1] 829
  #Remove Mitochondria    
      ps_prune_nochloro_nomito<-prune_taxa(as.vector(tax_table(ps_prune_nochloro)[,5]!="Mitochondria"),ps_prune_nochloro)
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
  #Remove Archaea  
    ps_prune_nochloro_nomito_noarchaea<-prune_taxa(as.vector(tax_table(ps_prune_nochloro_nomito)[,1]!="Archea"),ps_prune_nochloro_nomito)
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
     ntaxa(ps_woodchester) - ntaxa(ps_prune_nochloro_nomito_noarchaea)
## [1] 2604

2.2 Plot of Library Sizes

      #Inspect Library Sizes
            df <- as.data.frame(sample_data(ps_prune_nochloro_nomito_noarchaea)) # Put sample_data into a ggplot-friendly data.frame
            df$LibrarySize <- sample_sums(ps_prune_nochloro_nomito_noarchaea)
            df <- df[order(df$LibrarySize),]
            df$Index <- seq(nrow(df))
            df$sample_or_control<-ifelse(df$sampletype %in% c("negative","negative_ve"),"negative","sample")
           
            ggplot(data=df, aes(x=Index, y=LibrarySize,colour=sample_or_control)) + geom_point()

2.3 Identify Contaminants

        ##Identify Contaminants by Prevalence
            sample_data(ps_prune_nochloro_nomito_noarchaea)$is.neg <- sample_data(ps_prune_nochloro_nomito_noarchaea)$sampletype == "negative"
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
            contamdf.prev <- isContaminant(ps_prune_nochloro_nomito_noarchaea, method="prevalence", neg="is.neg",threshold=0.6,normalize=T)
            
          #How Many Contaminants?  
            table(contamdf.prev$contaminant) #722
## 
## FALSE  TRUE 
##  8182   722

2.4 Plot of Contaminant Frequency by Sample Type

Seems to be one ASV present in 3 negatives at high abundance - but decontam thinks this is contamination from true samples. Probably because it is not in the other 3 negatives

# Make phyloseq object of presence-absence in negative controls and true samples
  ps.pa <- transform_sample_counts(ps_prune_nochloro_nomito_noarchaea, function(abund) 1*(abund>0))
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
  ps.pa.neg <- prune_samples(sample_data(ps.pa)$is.neg == TRUE, ps.pa)
  ps.pa.pos <- prune_samples(sample_data(ps.pa)$is.neg == FALSE, ps.pa)

# Make data.frame of prevalence in positive and negative samples
  df.pa <- data.frame(pa.pos=taxa_sums(ps.pa.pos), pa.neg=taxa_sums(ps.pa.neg),
                      contaminant=contamdf.prev$contaminant)

#Plot
  ggplot(data=df.pa, aes(x=pa.neg, y=pa.pos, color=contaminant)) + geom_point() +
  xlab("Prevalence (Negative Controls)") + ylab("Prevalence (True Samples)")

2.4.1 Remove Contaminants

    ## Remove Contaminants
            ps_clean<-prune_taxa(!contamdf.prev$contaminant,ps_prune_nochloro_nomito_noarchaea)
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
     ##Remove Mocks and Negatives
            table(sample_data(ps_clean)$sampletype)
## 
##   badger negative 
##      167        6
            ps_badger<-subset_samples(ps_clean,sampletype=="badger" & !Sample %in% c("A","B"))
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'

2.5 Filter Based on Prevalence and Abundance

####### Read Threshold and Sample Prevalence Threshold
  ps_clean_filter = filter_taxa(ps_badger, function(x) sum(x) > 10, TRUE)
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
  #ps_clean_filter<-ps_badger

2.6 Assessing Post-QC Sample Coverage and Library Sizes

Now that we’ve done that, we can check what our post-QC library sizes are. It’s a good idea to report these in manuscripts and thesis chapters. Here was ask what the minimum, maximum and mean library sizes-per-samples are.

 ############## POST QC LIBRARY SIZES
      
      ###############
      # Post-QC Library Stats
      ###############
      
      mean(sample_sums(ps_clean_filter))
## [1] 56101.14
      range(sample_sums(ps_clean_filter))
## [1]  13569 267515

3 Sample Rarefaction

badger_matrix<-  t(as(otu_table(ps_clean_filter),"matrix"))
rarecurve(badger_matrix,step=50,cex=0.5)
## Warning in rarecurve(badger_matrix, step = 50, cex = 0.5): most observed count
## data have counts 1, but smallest count is 2

#Final Processing

3.1 Keep All Badgers

#Only Badgers  
  #ps_badger01<-prune_samples(sample_data(ps_clean_filter)$day0_diagnosis!="0.5",ps_clean_filter)
  ps_badger01<-ps_clean_filter

4 Sample Stats

#Badgers
  length(unique(sample_data(ps_clean_filter)$id))
## [1] 72
#Infection
    table(sample_data(ps_clean_filter)$day0_cdp)
## 
## 0.002162504 0.011800939 0.018212324 0.018355678 0.023783676 0.118358648 
##           1           1          48           1          61           1 
## 0.360305381 0.395608186 0.425203759 0.534689505 0.601465025 0.610032516 
##           1           4           1           2           9           1 
## 0.673103086 0.756314689 0.803003275 0.851805343  0.95890396 0.984162882 
##           1           1           1           1           1           1 
## 0.984493598 0.988353817 0.989785771 0.991890524 0.992220949  0.99554418 
##           1           1           1           1           1           2 
##  0.99653801 0.998535869 0.998934351 0.999090161 0.999281803 0.999386843 
##           2           1           1           1           1           1 
## 0.999743345 0.999822301  0.99983847 0.999925638 0.999983166 0.999999899 
##           1           1           1           1           1           1 
## 0.999999924 0.999999931 0.999999998 0.999999999           1 
##           1           1           1           1           4
#Known Age
    table(sample_data(ps_clean_filter)$age_fc) 
## 
## ADULT   CUB 
##    20   145
    length(unique(sample_data(ps_clean_filter)$id[sample_data(ps_clean_filter)$age_fc =="CUB"]))
## [1] 62

5 Alpha Diversity

5.1 Rarefy

##Extract Metadata
  meta5<-as(sample_data(ps_clean_filter),"data.frame")

#Minimum Read Depth   
  min(sample_sums(ps_clean_filter))
## [1] 13569
#Rarefy 
  ps_rare<-rarefy_even_depth(ps_clean_filter,rngseed = 10112022,sample.size = 13569)
## `set.seed(10112022)` was used to initialize repeatable random subsampling.
## Please record this for your records so others can reproduce.
## Try `set.seed(10112022); .Random.seed` for the full vector
## ...
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## 21OTUs were removed because they are no longer 
## present in any sample after random subsampling
## ...
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
#Richness
  badger_rich<-estimate_richness(ps_rare,measures = c("Observed","Shannon"))
  badger_rich$Sample<- sample_data(ps_rare)$Sample
  badger_rich<-left_join(badger_rich,meta5,"Sample")
  
##Filter
  table(badger_rich$social_group,useNA="ifany")
## 
##         BEECH       BOXWOOD     COLE PARK COLLIERS WOOD     HONEYWELL 
##             2            18            13            11            26 
##        KENNEL         LARCH        NETTLE       OLD OAK     PARK MILL 
##            16             6             7            12             1 
## SCOTLAND BANK   SEPTIC TANK      TOP SETT          WEST  WINDSOR EDGE 
##             8             2             1             5            12 
##     WOOD FARM      WOODRUSH           YEW 
##            13             2            10
  badger_rich$year_social<-with(badger_rich,paste(capture_year,social_group,sep="_"))
  year_social_table<-table( badger_rich$year_social)
  
#Filter to Only Cases Where there are 3 or more data points
  year_social3<-names(year_social_table)[year_social_table>2]
  badger_rich_filter3<-subset(badger_rich,year_social %in% year_social3)

5.2 Richness Plots

5.2.1 Social Group

## All Badgers
  richplot1<-ggplot(badger_rich,aes(x=social_group,y=Observed)) + geom_point(fill=moma.colors("Levine2")[3],colour="white",shape=21,size=5)+ facet_grid(capture_year~.)
  richplot2<- richplot1 + stat_summary(fun.data = "mean_cl_boot", colour = "black", linewidth = 1,shape=23,fill="white") + theme_bw() + labs(x="Social Group",y="Observed Bacterial \n Richness") + guides(fill="none") + theme(axis.text.x = element_text(angle = 45, hjust=1)) + theme(strip.text.y = element_text(size=18),axis.text = element_text(size=12),axis.title=element_text(size=15))
  richplot2
## Warning: Removed 3 rows containing missing values or values outside the scale range
## (`geom_segment()`).
## Removed 3 rows containing missing values or values outside the scale range
## (`geom_segment()`).
## Warning: Removed 7 rows containing missing values or values outside the scale range
## (`geom_segment()`).

    ggsave2('Richness All Badgers All Years.pdf',richplot2,width=15,height=15,units="cm")
## Warning: Removed 3 rows containing missing values or values outside the scale range
## (`geom_segment()`).
## Warning: Removed 3 rows containing missing values or values outside the scale range
## (`geom_segment()`).
## Warning: Removed 7 rows containing missing values or values outside the scale range
## (`geom_segment()`).
      ggsave2('Richness All Badgers All Years.tiff',richplot2,width=15,height=15,units="cm")
## Warning: Removed 3 rows containing missing values or values outside the scale range
## (`geom_segment()`).
## Warning: Removed 3 rows containing missing values or values outside the scale range
## (`geom_segment()`).
## Warning: Removed 7 rows containing missing values or values outside the scale range
## (`geom_segment()`).
#Tabulate
    badger_social_tab<-with(badger_rich,table(social_group,capture_year))  
    #write.csv(badger_social_tab,'Badger Social Group By Year.csv')

5.2.2 Tb Status

tb_lab<-expression(paste(italic("M. bovis  "), "Infection Probability"))

#Sample size
  nrow(badger_rich)
## [1] 165
#All Badgers
  richplot_tb1<-ggplot(badger_rich,aes(x=day0_cdp,y=Observed)) + geom_smooth(method="lm") + geom_point(size=5,shape=21,colour="white",fill=moma.colors("Levine2")[3]) + facet_grid(.~capture_year) + theme_bw(base_size = 15)
 richplot_tb2<- richplot_tb1 + labs(x=tb_lab,y="Observed Bacterial \n Richness") 
 richplot_tb2
## `geom_smooth()` using formula = 'y ~ x'

  ggsave2('All Badgers Continuous Tb Status by Year.pdf',richplot_tb2,width=22,height=10,units="cm")
## `geom_smooth()` using formula = 'y ~ x'

5.2.3 Tb Within Social Group

##Filter To At Least 5 Per Group 
  rich_filter_tb_tab<-with(badger_rich,table(social_group))
  rich_filter_tb_tab_filtergroups<-rownames(rich_filter_tb_tab)[rich_filter_tb_tab>4]
  rich_filter_tb_filter<-subset(badger_rich,social_group %in% rich_filter_tb_tab_filtergroups)

#All Badgers
  richplot_tb_soc1<-ggplot(rich_filter_tb_filter,aes(x=day0_cdp,y=Observed)) + geom_smooth(method="lm") + geom_point(size=5,shape=21,colour="white",fill=moma.colors("Levine2")[3]) + facet_wrap(.~social_group) + theme_bw(base_size = 15) + labs(x=tb_lab,y="Observed Bacterial \n Richness")
  richplot_tb_soc1
## `geom_smooth()` using formula = 'y ~ x'

  ggsave2('All Badgers Tb Status by Social Group.pdf',richplot_tb_soc1,width=30,height=22,units="cm")
## `geom_smooth()` using formula = 'y ~ x'

5.2.4 Tb Combined Plot

tb_richness_plot1<-plot_grid(richplot_tb_soc1,richplot_tb2,nrow=2,labels="AUTO",label_size = 22,rel_heights = c(2,1))
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
tb_richness_plot1

ggsave2('All Badgers Richness Continuous Tb Combined Plot.pdf',tb_richness_plot1,width=20,height=22,units="cm")
ggsave2('Richness Tb Combined Plot.tiff',tb_richness_plot1,width=20,height=22,units="cm")

5.2.5 Mass & Condition Score

## Relevel COndition
    badger_rich$condition<-factor(badger_rich$condition,levels=c("POOR","FAIR","GOOD","VERY GOOD"))
  
            #Richness Plot  by MASS 
              mass_plot1<-badger_rich %>% filter(!is.na(weight)) %>% ggplot(.,aes(x=weight,y=Observed))  + geom_smooth(method="lm")  + geom_point(shape=21,aes(fill=condition),size=5,color="white",alpha=0.7) + theme_bw()
              mass_plot2 <- mass_plot1+ theme(axis.text=element_text(size=20),axis.title = element_text(size=20),strip.text.x=element_text(size=20)) + labs(y="Observed Bacterial \n Richness",x="Mass (kg)")
              mass_plot3<-mass_plot2   + facet_wrap(capture_year~.)  + labs(fill="Condition") + scale_fill_brewer(palette = "Paired")
              mass_plot3
## `geom_smooth()` using formula = 'y ~ x'

              ggsave2('Mass by Year.pdf',mass_plot3,width=25,height=10,units="cm")
## `geom_smooth()` using formula = 'y ~ x'

5.3 Richness Models

5.3.1 Data Prep

library(brms)
## Loading required package: Rcpp
## Loading 'brms' package (version 2.22.0). Useful instructions
## can be found by typing help('brms'). A more detailed introduction
## to the package is available through vignette('brms_overview').
## 
## Attaching package: 'brms'
## The following object is masked from 'package:microbiomeMarker':
## 
##     nsamples
## The following object is masked from 'package:phyloseq':
## 
##     nsamples
## The following object is masked from 'package:stats':
## 
##     ar
## Sex Variable
  #badger_rich$sex<-ifelse(is.na(badger_rich$testes),"F","M")
  with(badger_rich,table(reproc,testes,exclude = NULL))
##                      testes
## reproc                ASCENDED DESCENDED MID <NA>
##   LACTATING                  0         0   0    9
##   LACTATING SOMETIME         0         0   0    7
##   LACTATING THIS YEAR        0         0   0    7
##   NEVER LACTATING            0         0   0   47
##   PREGNANT                   0         0   0    1
##   UNSURE                     0         0   0    3
##   <NA>                       5        65  17    4
  subset(badger_rich,is.na(reproc) & is.na(testes))
##     Observed  Shannon   Sample sampletype    location     date.x    tattoo_date
## 30        35 1.608767  13-2018     badger Woodchester 09/08/2016 39N 09/08/2016
## 71        51 2.378778 172-2018     badger Woodchester 09/08/2016 28A 09/08/2016
## 96        61 2.740949  29-2018     badger Woodchester 09/08/2016 39N 09/08/2016
## 107       84 2.936276   4-2018     badger Woodchester 10/08/2016  6C 10/08/2016
##      moc social_group   sett capture_year where weight toothwear condition
## 30  TRAP       NETTLE NETTLE         2016  SETT    6.7      0.75      POOR
## 71  TRAP WINDSOR EDGE HOPPER         2016  SETT    8.8      0.50      GOOD
## 96  TRAP       NETTLE NETTLE         2016  SETT    6.7      0.75      POOR
## 107 TRAP    HONEYWELL   BEND         2016  SETT    4.0      0.00      FAIR
##     reproc testes sampledate_julian highest_cdp highest_cdp_time   day0_cdp
## 30    <NA>   <NA>     17021.96 days  0.99653801                0 0.99653801
## 71    <NA>   <NA>     17021.96 days  0.02378368                0 0.02378368
## 96    <NA>   <NA>     17021.96 days  0.99653801                0 0.99653801
## 107   <NA>   <NA>     17022.96 days  0.02378368              -43 0.02378368
##           date pm age_fc year_fc known_age   id is.neg       year_social
## 30  09/08/2016 No    CUB    2009         7 ID22  FALSE       2016_NETTLE
## 71  09/08/2016 No    CUB    2014         2 ID32  FALSE 2016_WINDSOR EDGE
## 96  09/08/2016 No    CUB    2009         7 ID22  FALSE       2016_NETTLE
## 107 28/06/2016 No    CUB    2016         1 ID27  FALSE    2016_HONEYWELL
##Filter Data to No Missing Values for Any Predictor   
  badger_rich_complete<-badger_rich[with(badger_rich,complete.cases(weight,capture_year,toothwear,social_group,day0_cdp)),]
  

##Summary Stats  
  nrow(badger_rich_complete) # 165 samples
## [1] 165
  length(unique(badger_rich_complete$social_group)) #18 social groups (18 in BRMS model)
## [1] 18
  length(unique(badger_rich_complete$capture_year)) #3 capture years
## [1] 3
  length(unique(badger_rich_complete$id)) #72 badgers
## [1] 72
##Factor for Cap Year  
  badger_rich_complete$capture_year<-factor(badger_rich_complete$capture_year)

5.3.2 Plots

## Check Correlation Between Age and Mass
    ggplot(badger_rich_complete,aes(x=known_age,y=weight)) + geom_smooth(method="lm",formula=y~x+I(x^2)) + geom_point(shape=21,size=5,fill="white")
## Warning: Removed 20 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_point()`).

    with(badger_rich_complete,cor.test(known_age,weight))
## 
##  Pearson's product-moment correlation
## 
## data:  known_age and weight
## t = 1.6465, df = 143, p-value = 0.1019
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.02721656  0.29289479
## sample estimates:
##       cor 
## 0.1363978
  ### All Data Plot  
   ggplot(badger_rich_complete,aes(x=known_age,y=Observed)) + geom_smooth(method="lm") + geom_point(shape=21,size=5,fill="white") + scale_x_continuous(n.breaks=11) + theme_bw() + plotopts + labs(y="Observed Richness",x="Age")
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 20 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Removed 20 rows containing missing values or values outside the scale range
## (`geom_point()`).

    with(badger_rich_complete,cor.test(known_age,Observed))
## 
##  Pearson's product-moment correlation
## 
## data:  known_age and Observed
## t = -2.3428, df = 143, p-value = 0.02052
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.34447089 -0.03019432
## sample estimates:
##        cor 
## -0.1922573
  ### Subset to Repeat Sampled Badgers
    badger_rich_tab<-table(badger_rich_complete$id,badger_rich_complete$known_age)
    badger_rich_tab2<-apply(badger_rich_tab,1,function(x)sum(x>0))
    badger_rich_complete_min3<-subset(badger_rich_complete,id %in% names(badger_rich_tab2)[badger_rich_tab2>2])
    table(badger_rich_complete_min3$id)
## 
## ID11 ID12 ID13  ID2 ID23  ID8 
##    4    3    6    8    4    7
#### PLOT    
   ggplot(badger_rich_complete_min3,aes(x=known_age,y=Observed,fill=id)) + geom_smooth(method="lm",se=F) + geom_point(shape=21,size=5) + theme_bw() + plotopts + labs(y="Observed Richness",x="Age") + scale_x_continuous(n.breaks=11) + facet_wrap(.~id) + guides(fill="none")
## `geom_smooth()` using formula = 'y ~ x'

5.3.3 Models

#Fix Capture Year 
      badger_rich_complete$capture_year<-factor(badger_rich_complete$capture_year)
      badger_rich_complete$toothwear<-as.factor(badger_rich_complete$toothwear)

#Sex
    subset(badger_rich_complete,is.na(reproc) & is.na(testes))  
##     Observed  Shannon   Sample sampletype    location     date.x    tattoo_date
## 30        35 1.608767  13-2018     badger Woodchester 09/08/2016 39N 09/08/2016
## 71        51 2.378778 172-2018     badger Woodchester 09/08/2016 28A 09/08/2016
## 96        61 2.740949  29-2018     badger Woodchester 09/08/2016 39N 09/08/2016
## 107       84 2.936276   4-2018     badger Woodchester 10/08/2016  6C 10/08/2016
##      moc social_group   sett capture_year where weight toothwear condition
## 30  TRAP       NETTLE NETTLE         2016  SETT    6.7      0.75      POOR
## 71  TRAP WINDSOR EDGE HOPPER         2016  SETT    8.8       0.5      GOOD
## 96  TRAP       NETTLE NETTLE         2016  SETT    6.7      0.75      POOR
## 107 TRAP    HONEYWELL   BEND         2016  SETT    4.0         0      FAIR
##     reproc testes sampledate_julian highest_cdp highest_cdp_time   day0_cdp
## 30    <NA>   <NA>     17021.96 days  0.99653801                0 0.99653801
## 71    <NA>   <NA>     17021.96 days  0.02378368                0 0.02378368
## 96    <NA>   <NA>     17021.96 days  0.99653801                0 0.99653801
## 107   <NA>   <NA>     17022.96 days  0.02378368              -43 0.02378368
##           date pm age_fc year_fc known_age   id is.neg       year_social
## 30  09/08/2016 No    CUB    2009         7 ID22  FALSE       2016_NETTLE
## 71  09/08/2016 No    CUB    2014         2 ID32  FALSE 2016_WINDSOR EDGE
## 96  09/08/2016 No    CUB    2009         7 ID22  FALSE       2016_NETTLE
## 107 28/06/2016 No    CUB    2016         1 ID27  FALSE    2016_HONEYWELL
    badger_rich_complete$sex<-ifelse(is.na(badger_rich_complete$testes),"F","M")
    badger_rich_complete$sex[badger_rich_complete$id=="ID27"]<-"M"
    badger_rich_complete$sex[badger_rich_complete$id=="ID32"]<-"F"
    
    
## Subset to Known Age
    badger_rich_complete_age<-subset(badger_rich_complete,!is.na(known_age))
    
#Sample Size
    nrow(badger_rich_complete_age)
## [1] 145
    length(unique(badger_rich_complete_age$id))
## [1] 62
    length(unique(badger_rich_complete_age$social_group))
## [1] 18

5.4 Bivariate Model

library(brms)

  
    #Model For Each Outcome 
        bf_rich<-bf(Observed ~ capture_year +  known_age + I(known_age^2) + sex + day0_cdp +  (1|p|social_group) + (1|q|id)) + negbinomial()
        bf_mass<-bf(weight ~ capture_year  + known_age + I(known_age^2)  + sex + day0_cdp + (1|p|social_group) + (1|q|id)) + gaussian()

  fit1 <- brm(
  bf_rich + bf_mass + set_rescor(FALSE),
  data = badger_rich_complete_age, chains = 4, cores = 4,
  control = list(adapt_delta = 0.95),save_pars = save_pars(all = TRUE),
)
## Compiling Stan program...
## Trying to compile a simple C file
## Running /Library/Frameworks/R.framework/Resources/bin/R CMD SHLIB foo.c
## using C compiler: ‘Apple clang version 17.0.0 (clang-1700.0.13.5)’
## using SDK: ‘’
## clang -arch arm64 -I"/Library/Frameworks/R.framework/Resources/include" -DNDEBUG   -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/Rcpp/include/"  -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/"  -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/unsupported"  -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/BH/include" -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/StanHeaders/include/src/"  -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/StanHeaders/include/"  -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppParallel/include/"  -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/rstan/include" -DEIGEN_NO_DEBUG  -DBOOST_DISABLE_ASSERTS  -DBOOST_PENDING_INTEGER_LOG2_HPP  -DSTAN_THREADS  -DUSE_STANC3 -DSTRICT_R_HEADERS  -DBOOST_PHOENIX_NO_VARIADIC_EXPRESSION  -D_HAS_AUTO_PTR_ETC=0  -include '/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp'  -D_REENTRANT -DRCPP_PARALLEL_USE_TBB=1   -I/opt/R/arm64/include    -fPIC  -falign-functions=64 -Wall -g -O2  -c foo.c -o foo.o
## In file included from <built-in>:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp:22:
## In file included from /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/Eigen/Dense:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/Eigen/Core:19:
## /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:679:10: fatal error: 'cmath' file not found
##   679 | #include <cmath>
##       |          ^~~~~~~
## 1 error generated.
## make: *** [foo.o] Error 1
## Start sampling
  summary(fit1)
##  Family: MV(negbinomial, gaussian) 
##   Links: mu = log; shape = identity
##          mu = identity; sigma = identity 
## Formula: Observed ~ capture_year + known_age + I(known_age^2) + sex + day0_cdp + (1 | p | social_group) + (1 | q | id) 
##          weight ~ capture_year + known_age + I(known_age^2) + sex + day0_cdp + (1 | p | social_group) + (1 | q | id) 
##    Data: badger_rich_complete_age (Number of observations: 145) 
##   Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
##          total post-warmup draws = 4000
## 
## Multilevel Hyperparameters:
## ~id (Number of levels: 62) 
##                                          Estimate Est.Error l-95% CI u-95% CI
## sd(Observed_Intercept)                       0.13      0.05     0.02     0.22
## sd(weight_Intercept)                         0.83      0.26     0.25     1.31
## cor(Observed_Intercept,weight_Intercept)     0.23      0.38    -0.54     0.92
##                                          Rhat Bulk_ESS Tail_ESS
## sd(Observed_Intercept)                   1.00      872      836
## sd(weight_Intercept)                     1.00      853      791
## cor(Observed_Intercept,weight_Intercept) 1.00      996     1103
## 
## ~social_group (Number of levels: 18) 
##                                          Estimate Est.Error l-95% CI u-95% CI
## sd(Observed_Intercept)                       0.04      0.04     0.00     0.13
## sd(weight_Intercept)                         0.54      0.33     0.03     1.25
## cor(Observed_Intercept,weight_Intercept)    -0.06      0.58    -0.96     0.93
##                                          Rhat Bulk_ESS Tail_ESS
## sd(Observed_Intercept)                   1.00     1934     2246
## sd(weight_Intercept)                     1.01      900     1408
## cor(Observed_Intercept,weight_Intercept) 1.00     1111     2213
## 
## Regression Coefficients:
##                           Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## Observed_Intercept            4.19      0.09     4.00     4.37 1.00     3917
## weight_Intercept              6.29      0.57     5.22     7.42 1.00     3177
## Observed_capture_year2017     0.04      0.06    -0.08     0.15 1.00     5004
## Observed_capture_year2018    -0.09      0.09    -0.27     0.08 1.00     4161
## Observed_known_age           -0.09      0.05    -0.19     0.01 1.00     3519
## Observed_Iknown_ageE2         0.01      0.01    -0.00     0.02 1.00     3779
## Observed_sexM                -0.04      0.07    -0.17     0.09 1.00     5120
## Observed_day0_cdp             0.06      0.08    -0.10     0.22 1.00     5222
## weight_capture_year2017       0.62      0.33    -0.02     1.26 1.00     4107
## weight_capture_year2018       0.25      0.49    -0.75     1.21 1.00     3727
## weight_known_age              0.66      0.30     0.07     1.24 1.00     3013
## weight_Iknown_ageE2          -0.08      0.03    -0.14    -0.02 1.00     3456
## weight_sexM                   1.55      0.37     0.81     2.30 1.00     3901
## weight_day0_cdp               0.33      0.48    -0.62     1.25 1.00     5347
##                           Tail_ESS
## Observed_Intercept            3026
## weight_Intercept              2989
## Observed_capture_year2017     3256
## Observed_capture_year2018     3276
## Observed_known_age            3064
## Observed_Iknown_ageE2         2846
## Observed_sexM                 3630
## Observed_day0_cdp             3451
## weight_capture_year2017       2656
## weight_capture_year2018       3084
## weight_known_age              2199
## weight_Iknown_ageE2           2332
## weight_sexM                   2639
## weight_day0_cdp               3363
## 
## Further Distributional Parameters:
##                Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## shape_Observed    16.47      3.29    10.82    23.72 1.00     1634     2433
## sigma_weight       1.51      0.12     1.29     1.76 1.00     1807     2483
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
  conditional_effects(fit1)

  bayes_R2(fit1)
##             Estimate  Est.Error      Q2.5     Q97.5
## R2Observed 0.2714016 0.08475793 0.1116479 0.4307328
## R2weight   0.4484407 0.06753946 0.3046351 0.5657795
  pp_check(fit1,resp='Observed')
## Using 10 posterior draws for ppc type 'dens_overlay' by default.

  pp_check(fit1,resp='weight')
## Using 10 posterior draws for ppc type 'dens_overlay' by default.

6 Beta Diversity

6.1 Stacked Barplot

#What Are the Names of the most abundant phyla?  
  physeq_phylumcollapse<- ps_rare %>% microbiome::aggregate_taxa(level="Phylum")
  physeq_top5phyla = names(sort(taxa_sums(physeq_phylumcollapse), TRUE)[1:5])
  physeq_top5phyla
## [1] "Firmicutes"       "Proteobacteria"   "Fusobacteriota"   "Bacteroidota"    
## [5] "Campylobacterota"
#Subset the phyloseq object to those phyla   
  physeq_top5phylum_filter<-subset_taxa(ps_rare,Phylum %in% physeq_top5phyla)
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
#Sample Size
  phyloseq::nsamples(physeq_top5phylum_filter)
## [1] 165

6.1.1 Sample Grouping

#Remake Our Graph  but with no grouping (samples)
physeq_top5phylum_samples_plot <- physeq_top5phylum_filter %>%
  microbiome::aggregate_taxa(level = "Phylum") %>%  
  microbiome::transform(transform = "compositional") %>%
  plot_composition(sample.sort = "Firmicutes")
physeq_top5phylum_samples_plot  

6.1.2 Year Grouping

#Remake Our Graph  but with averaging by YEAR
  physeq_top5phylum_year_plot <- physeq_top5phylum_filter %>%
    microbiome::aggregate_taxa(level = "Phylum") %>%  
    microbiome::transform(transform = "compositional") %>%
    plot_composition(sample.sort = "Firmicutes", average_by = "capture_year") + scale_fill_manual(values = moma.colors("Levine2", direction=-1)) + theme_bw() + labs(fill="Bacterial Phylum",x="Capture Year") + plotopts2 + theme(axis.text.x = element_text(angle=45,hjust=1, size = 14), axis.text.y = element_text(size = 14)) + guides(fill="none")
  physeq_top5phylum_year_plot

    ggsave2('Badger Barplot by Year.pdf',physeq_top5phylum_year_plot,width=10,height=15,units="cm")
    ggsave2('Badger Barplot by Year.tiff',physeq_top5phylum_year_plot,width=10,height=15,units="cm")

6.1.3 Social Group Grouping

    #Remake Our Graph  but with averaging by Social Group
  physeq_top5phylum_social_plot <- physeq_top5phylum_filter %>%
    microbiome::aggregate_taxa(level = "Phylum") %>%  
    microbiome::transform(transform = "compositional") %>%
    plot_composition(sample.sort = "Firmicutes", average_by = "social_group") + labs(fill="Bacterial Phylum",x="Social Group")+ scale_fill_manual(values = moma.colors("Levine2", direction = -1)) + theme_bw()  + plotopts + theme(axis.text.x = element_text(angle=45,hjust=1,vjust=1, size = 12), axis.text.y = element_text(size = 14), legend.text = element_text(size = 14)) 
  physeq_top5phylum_social_plot

        ggsave2('Badger Barplot by SocialGroup.pdf',physeq_top5phylum_social_plot,width=15,height=10,units="cm")
        ggsave2('Badger Barplot by SocialGroup.tiff',physeq_top5phylum_social_plot,width=20,height=15,units="cm")

6.1.4 Combined Plot

#Combined plot
abundance_plots<-plot_grid(physeq_top5phylum_social_plot,physeq_top5phylum_year_plot,nrow=1,labels=c("A", "B"), label_size = 25)
abundance_plots

ggsave("Combined_Badger_Barplots.png", abundance_plots, width = 30, height = 20, units = "cm")
ggsave("Combined_Badger_Barplots.pdf", abundance_plots, width = 30, height = 20, units = "cm")

6.2 Some Functions

#Sample for Stripping out the ASV matrix from a phyloseq object (run all of this)
  vegan_otu <- function(physeq) {
    OTU <- otu_table(physeq)
    if (taxa_are_rows(OTU)) {
      OTU <- t(OTU)
    }
    return(as(OTU, "matrix"))
  }

6.3 CLR Transformation

#Extract Matrix and Sample Data - Same Dataset for Richness
  ps_badger_forclr<-prune_samples(sample_data(ps_badger01)$Sample %in% badger_rich_complete_age$Sample,ps_badger01)

  ps_clr<-microbiome::transform(ps_badger_forclr,"clr")
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
  woodchester_clr_v<-vegan_otu(ps_clr)
  woodchester_clr_s<-as(sample_data(ps_clr),"data.frame")
  woodchester_clr_s$sex<-badger_rich_complete_age$sex[match(woodchester_clr_s$Sample,badger_rich_complete_age$Sample)]

6.4 PERMANOVA

### Sample Sizes for PERMANOVA
  nrow(woodchester_clr_s)
## [1] 145
  length(unique(woodchester_clr_s$social_group))
## [1] 18
  length(unique(woodchester_clr_s$id))
## [1] 62
  table(woodchester_clr_s$capture_year)
## 
## 2016 2017 2018 
##   49   70   26
    mean(complete.cases(woodchester_clr_s[,c("known_age","day0_cdp","social_group","sex","capture_year")])) #all complete data
## [1] 1
woodchester_clr_s$social_group<-factor(woodchester_clr_s$social_group)

#Fit Model 
  badger_clr_perm<-adonis2(woodchester_clr_v ~  social_group + known_age +sex + day0_cdp + factor(capture_year) + factor(id),data=woodchester_clr_s,method="euclidean",by="term")
    badger_clr_perm
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = woodchester_clr_v ~ social_group + known_age + sex + day0_cdp + factor(capture_year) + factor(id), data = woodchester_clr_s, method = "euclidean", by = "term")
##                       Df SumOfSqs      R2      F Pr(>F)    
## social_group          17    29952 0.16459 1.5707  0.001 ***
## known_age              1     2244 0.01233 2.0001  0.001 ***
## sex                    1     2102 0.01155 1.8739  0.003 ** 
## day0_cdp               1     1045 0.00574 0.9318  0.583    
## factor(capture_year)   2     4369 0.02401 1.9473  0.001 ***
## factor(id)            48    59265 0.32566 1.1007  0.032 *  
## Residual              74    83009 0.45613                  
## Total                144   181987 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

6.5 CLR Ordination

############# CLR Transform Ordination 
  
  library(ggordiplots)
  
  ##PCA In Vegan 
    grouptab<-table(woodchester_clr_s$social_group)
    group5<-names(grouptab)[grouptab>4]
    ps_badger_orddata<-prune_samples(sample_data(ps_badger_forclr)$social_group %in% group5,ps_badger_forclr) #122
    ps_orddata_clr<-microbiome::transform(ps_badger_orddata,"clr")
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
  woodchester_group5_clr_v<-vegan_otu(ps_orddata_clr)
  woodchester_group5_clr_s<-as(sample_data(ps_orddata_clr),"data.frame")

### Sample Sizes for ORDINATION
  nrow(woodchester_group5_clr_s)
## [1] 131
  length(unique(woodchester_group5_clr_s$social_group))
## [1] 11
  length(unique(woodchester_group5_clr_s$id))
## [1] 54
  table(woodchester_group5_clr_s$capture_year)
## 
## 2016 2017 2018 
##   43   66   22
    #Run PCA 
        woodchester_group5_clr_pca<-rda(woodchester_group5_clr_v)
    
  #Inspect Plot 
    gg_ordiplot(woodchester_group5_clr_pca,groups=woodchester_group5_clr_s$social_group,spiders = TRUE)

  #Extract for GGPLot2
      social_ggord_data<-gg_ordiplot(woodchester_group5_clr_pca,groups=woodchester_group5_clr_s$social_group,spiders = TRUE,plot=FALSE) 
      social_ggord_data2<-social_ggord_data$df_ord
      social_ggord_data2$capture_year<-woodchester_group5_clr_s$capture_year
      
    #Add Social Group to DF SPIDERS  
      social_ggord_data2$df_spiders$Sample<-rownames(social_ggord_data2$df_spiders)
        social_ggord_data2$df_spiders$capture_year<-woodchester_group5_clr_s$capture_year[match(social_ggord_data2$df_spiders$Sample,woodchester_group5_clr_s$Sample)]
      
  #Plot   
    badger_social_clr_plot1<-ggplot() + geom_segment(data=social_ggord_data$df_spiders, aes(x=cntr.x, xend=x, y=cntr.y, yend=y, color=Group), show.legend = FALSE) 
  #Plot Points  
    badger_social_clr_plot2<- badger_social_clr_plot1 + geom_point(data=social_ggord_data2,aes(x=x,y=y,fill=Group),shape=21,color="white",size=5,alpha=0.8) + theme_bw() 
 #Colour
    badger_social_clr_plot3<- badger_social_clr_plot2 + labs(x="PC1 (9.3%)",y="PC2 (8.5%)",fill="Site") + plotopts + theme(legend.text = element_text(size=12), axis.text.y = element_text(size = 14), axis.text.x = element_text(size = 14)) + labs(fill="Social Group") + scale_fill_manual(values = moma.colors("Warhol")) + scale_color_manual(values = moma.colors("Warhol"))
#Add Ellipses
    badger_social1617_clr_plot4<- badger_social_clr_plot3 + geom_path(data = social_ggord_data$df_ellipse, aes(x = x, y = y, color = Group), show.legend = FALSE)
badger_social1617_clr_plot4

    ggsave2('CLR Beta Diversity Group5.pdf',badger_social1617_clr_plot4,width=25,height=10,units="cm")
    ggsave2('CLR Beta Diversity Group5.png',badger_social1617_clr_plot4,width=25,height=15,units="cm")

7 Individual Dynamics

7.1 Extract Values

   ### All Badger CLR 
    ps_all_clr<-microbiome::transform(ps_clean_filter,"clr")
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
    ps_all_clr_v<-vegan_otu(ps_all_clr)
    ps_all_clr_s<-as(sample_data(ps_all_clr),"data.frame")
    ps_all_ord<-rda(ps_all_clr_v)
    ps_all_scores<-scores(ps_all_ord)$sites
    ps_all_scores2<-cbind(ps_all_scores,ps_all_clr_s)
    
      ps_all_scores2$Sample<-rownames(ps_all_scores2)
  ps_all_scores2$observed_richness<-badger_rich$Observed[match(ps_all_scores2$Sample,badger_rich$Sample)]
#Subset
  ps_all_scores3<-subset(ps_all_scores2,!is.na(known_age))
#Sample Size  
  nrow(ps_all_scores3)
## [1] 145
  length(unique(ps_all_scores3$id))
## [1] 62

7.2 Age PC1 Individual Level

  #### Dynamics with Age for Repeat Sampled Badger
    badger_beta_tab<-table(ps_all_scores2$id,ps_all_scores2$known_age)
    badger_beta_tab2<-apply(badger_beta_tab,1,function(x)sum(x>0))
    badger_beta_complete_min3<-subset(ps_all_scores2,id %in% names(badger_beta_tab2)[badger_beta_tab2>2])
    table(badger_beta_complete_min3$id)
## 
## ID11 ID12 ID13  ID2 ID23  ID8 
##    4    3    6    8    4    7
#### PLOT    
   pc1_age_plot1<-ggplot(badger_beta_complete_min3,aes(x=known_age,y=PC1,fill=id)) + geom_smooth(method="lm",se=F) + geom_point(shape=21,size=5) + theme_bw() + plotopts + labs(y="Beta Diversity (PC1)",x="Age") + scale_x_continuous(n.breaks=11) + facet_wrap(.~id) + guides(fill="none") + scale_fill_manual(values=MoMAColors::moma.colors("Levine2",6)) + theme(panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(), axis.text.y = element_text(size = 14))
   pc1_age_plot1
## `geom_smooth()` using formula = 'y ~ x'

7.2.1 Age Richness Individual Level

 #### Dynamics with Age for Repeat Sampled Badger
    badger_rich_tab<-table(badger_rich$id,badger_rich$known_age)
    badger_rich_tab2<-apply(badger_rich_tab,1,function(x)sum(x>0))
    badger_rich_complete_min3<-subset(badger_rich,id %in% names(badger_rich_tab2)[badger_rich_tab2>2])
    table(badger_rich_complete_min3$id)
## 
## ID11 ID12 ID13  ID2 ID23  ID8 
##    4    3    6    8    4    7
#### PLOT    
   rich_age_plot1<-ggplot(badger_rich_complete_min3,aes(x=known_age,y=Observed,fill=id)) + geom_smooth(method="lm",se=F) + geom_point(shape=21,size=5) + theme_bw() + plotopts + labs(y="Alpha Diversity (Richness)",x="Age") + scale_x_continuous(n.breaks=11) + facet_wrap(.~id) + guides(fill="none") + scale_fill_manual(values=MoMAColors::moma.colors("Levine2",6))+ theme(panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(), axis.text.y = element_text(size = 14))
   rich_age_plot1
## `geom_smooth()` using formula = 'y ~ x'

7.2.2 Combined Age Dynamics Plot

#age_rich3,temporal_age_plot2,
#age_grid1<-plot_grid(rich_age_plot1,pc1_age_plot1,nrow=1,labels="AUTO",label_size = 25)
#ggsave2('Age Dynamics Plot.pdf',age_grid1,width=28,height=12,units="cm")
#ggsave2('Age Dynamics Plot.png',age_grid1,width=28,height=12,units="cm")

7.2.3 Stacked Barplot By Age

#Known Age
  ps_age<-prune_samples(sample_data(ps_rare)$id %in% ps_all_scores3$id & !is.na(sample_data(ps_rare)$known_age),ps_rare)

#Filter to Top 5 Phyla
  ps_age_top5phylum_filter<-subset_taxa(ps_age,Phylum %in% physeq_top5phyla)
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
#Plot Data
  ps_age_top5phylum_data <- ps_age_top5phylum_filter %>%
    microbiome::aggregate_taxa(level = "Phylum") %>%  
    microbiome::transform(transform = "compositional") %>% plot_composition(sample.sort = "known_age") 
  
#Copy Across ID  
  ps_age_top5phylum_data2<-ps_age_top5phylum_data$data
  ps_age_top5phylum_data2$id<-sample_data(ps_age_top5phylum_filter)$id[match(ps_age_top5phylum_data2$Sample,sample_data(ps_age_top5phylum_filter)$Sample)]
  
   ps_age_top5phylum_data2$age<-sample_data(ps_age_top5phylum_filter)$known_age[match(ps_age_top5phylum_data2$Sample,sample_data(ps_age_top5phylum_filter)$Sample)]
   
#Plot
  ps_age_top5phylum_plot <- ggplot(ps_age_top5phylum_data2, aes(x = xlabel, y = Abundance, fill = Tax)) + scale_x_discrete(breaks = ps_age_top5phylum_data2$xlabel,labels=as.character(ps_age_top5phylum_data2$age)) + geom_bar(position = "stack", stat = "identity") + facet_wrap(~id, scales = "free") + theme_bw() + scale_fill_manual(values = moma.colors("Levine2", direction = -1)) + labs(fill="Bacterial Phylum",x="Age") 
  ps_age_top5phylum_plot

        ggsave2('Badger Barplot by Age.pdf',ps_age_top5phylum_plot,width=30,height=30,units="cm")
        ggsave2('Badger Barplot by Age.tiff',ps_age_top5phylum_plot,width=30,height=30,units="cm")

7.3 PC1 Over Time (Date) by Infection Probability

Uses A different dataset filtered to known infection badgers

  #Subset To Individualls with Multiuple entires 
    woodchester_temp_table<-table(ps_all_scores2$id)
    woodchester_scores_sub<-subset(ps_all_scores2,id %in% names(woodchester_temp_table)[woodchester_temp_table>2])
   
          temporal_plot1<-ggplot(woodchester_scores_sub,aes(x=sampledate_julian,y=PC1)) + geom_smooth(method="lm",se=F)+ geom_point(size=5,shape=21,aes(fill=day0_cdp),color="gray77")   
          temporal_plot2<- temporal_plot1+ facet_wrap(.~id) + labs(x= "Julian Day",fill="M. bovis Infection Probability") + theme_bw() + theme(axis.text=element_text(size=15),strip.text.x=element_text(size=15),axis.title=element_text(size=15),legend.position = "top",axis.text.x=element_text(angle=90,hjust=1)) 
          temporal_plot3<- temporal_plot2 + scale_fill_moma_c("Exter")
          temporal_plot3
## Don't know how to automatically pick scale for object of type <difftime>.
## Defaulting to continuous.
## `geom_smooth()` using formula = 'y ~ x'

              ggsave2('Infection PC1 Temporal Plot.pdf',temporal_plot3,height=18,width=20,units="cm")
## Don't know how to automatically pick scale for object of type <difftime>.
## Defaulting to continuous.
## `geom_smooth()` using formula = 'y ~ x'
              ggsave2('Infection PC1 Temporal Plot.tiff',temporal_plot3,height=18,width=20,units="cm")
## Don't know how to automatically pick scale for object of type <difftime>.
## Defaulting to continuous.
## `geom_smooth()` using formula = 'y ~ x'

8 General Linear Latent Variable Models

## OTUs Top 50
  badger_genus<-aggregate_top_taxa2(ps_badger_forclr, "Genus", top = 50)
  clr_scaled <-microbiome::transform(badger_genus, transform = "clr")

#Extract
  ys <- data.frame(t(otu_table(clr_scaled)))
  names(ys) <-taxa_names(clr_scaled)
  
#Predictors
  Xs<-data.frame(sample_data(clr_scaled)) %>% select(id,day0_cdp,known_age,social_group,capture_year)
  Xs$sex<-badger_rich_complete_age$sex[match(rownames(Xs),badger_rich_complete_age$Sample)]
  Xs$capture_year<-factor(Xs$capture_year)

#Define Study Design  
  sDesign<-data.frame(id = Xs$id)

## Model 
  fit_reduced_scaled <- gllvm(ys, Xs, 
                              num.lv = 2,
                              formula = ~ capture_year + day0_cdp + known_age + sex, 
                              family = "gaussian",
                              row.eff = ~(1|id),starting.val='random', studyDesign = sDesign)
## Warning in gllvm(ys, Xs, num.lv = 2, formula = ~capture_year + day0_cdp + : There are rows full of zeros in y.
    coefplot(fit_reduced_scaled)

## Model 
  fit_reduced_scaled2 <- gllvm(ys, Xs, 
                              num.lv = 2,
                              formula = ~ capture_year + sex * day0_cdp + known_age, 
                              family = "gaussian",
                              row.eff = ~(1|id),starting.val='random', studyDesign = sDesign)
## Warning in gllvm(ys, Xs, num.lv = 2, formula = ~capture_year + sex * day0_cdp + : There are rows full of zeros in y.
#Estimates 
  df<-coef(fit_reduced_scaled2)
  est_df<-data.frame(df$Intercept)
  est_df2<-data.frame(df$Xcoef) 
  est_df3<-merge(est_df, est_df2, by = 0)
  
#Order genera
  row.names(est_df3)<-est_df3$Row.names
  est_df3<-est_df3[colnames(ys),]
  names(est_df3)[1]<- "Genus"
  names(est_df3)[2]<- "Intercept"
  

### COnfidence Intervals
  confint_df<-data.frame(confint(fit_reduced_scaled2))
  
#Identify Rows with Main Effects
   one_comma<-sapply(rownames(confint_df),function(x) length(gregexpr(":", x, fixed = TRUE)[[1]]))==1
   
###Strip Out Individual Datasets
  
  ##CDP
    cdp_main<-grepl("^Xcoef.day0_cdp",rownames(confint_df))
    int_data_day0cdp<-cbind(est_df3[,c("Genus","day0_cdp")],confint_df[cdp_main+one_comma==2,])
  ##Sex
    sex_main<-grepl("^Xcoef.sexM",rownames(confint_df))
    int_data_sexM<-cbind(est_df3[,c("Genus","sexM")],confint_df[sex_main+one_comma==2,])
  ##Age
    age_main<-grepl("^Xcoef.known_age",rownames(confint_df))
    int_data_age<-cbind(est_df3[,c("Genus","known_age")],confint_df[age_main+one_comma==2,])
  #  ##Tb Sex Interaction
  int_data_sex_day0<-cbind(est_df3[,c("Genus","sexM.day0_cdp")],confint_df[grep("Xcoef.sexM:day0_cdp",rownames(confint_df)),]) 
    # ##Age: Tb Interaction
  # int_data_age_tb<-cbind(est_df3[,c("Genus","day0_cdp.known_age")],confint_df[grep("Xcoef.day0_cdp:known_age",rownames(confint_df)),])
  # 
 
  
#Extra Variables and Rename Columns 
  colnames(int_data_day0cdp)<-c("Genus","Estimate","l95","u95")
  colnames(int_data_sexM)<-c("Genus","Estimate","l95","u95")
  colnames(int_data_age)<-c("Genus","Estimate","l95","u95")
  # colnames(int_data_age_tb)<-c("Genus","Estimate","l95","u95")
  colnames(int_data_sex_day0)<-c("Genus","Estimate","l95","u95")

  int_data_day0cdp$trait<-"Infection Probability"
  int_data_sexM$trait<-"sex Male"
  int_data_age$trait<-"Age"
  # int_data_age_tb$trait<-"Age:Infection"
  int_data_sex_day0$trait<-"Male:Infection"
    
  tb_mod_plotdata<-rbind(int_data_day0cdp,int_data_sexM,int_data_age,int_data_sex_day0)

#Order   
  tb_mod_plotdata$trait<-factor(tb_mod_plotdata$trait,levels=c("sex Male","Age","Infection Probability","Male:Infection"))
  tb_mod_plotdata2<- tb_mod_plotdata %>% group_by(trait)  %>% arrange(Estimate,.by_group=T)
  tb_mod_plotdata2$Genus<-factor(tb_mod_plotdata2$Genus,levels=unique(tb_mod_plotdata2$Genus))
#Significance  
  tb_mod_plotdata2$Sig<- !data.table::between(0, tb_mod_plotdata2$l95, tb_mod_plotdata2$u95)

   sig_col<-moma.colors("Levine2")[6]

##Subset
   tb_mod_plotdata_sigtab<-with(tb_mod_plotdata2,table(Genus,Sig))
   tb_mod_plotdata_sigsubset<-subset(tb_mod_plotdata2,Genus %in% rownames(tb_mod_plotdata_sigtab)[tb_mod_plotdata_sigtab[,2]>0])
   
### Plot
  tb_plot1<-ggplot(tb_mod_plotdata_sigsubset,aes(x=Estimate,y=Genus)) + geom_errorbarh(aes(xmin=l95,xmax=u95,color=Sig),linewidth = 1.2,alpha=0.7) + geom_point(size=7,shape=21,color="gray40",aes(fill=Sig),alpha=0.7)
  tb_plot2<- tb_plot1 + theme_bw(base_size = 20) + geom_vline(xintercept=0,linetype="dashed") + scale_color_manual(values=c("gray77",sig_col)) + scale_fill_manual(values=c("white",sig_col)) + guides(fill="none",color="none") + facet_wrap(.~trait,scales = "free_x", nrow = 1) + 
    theme(
    axis.text = element_text(size = 22),
    axis.title = element_text(size = 26),
    strip.text = element_text(size = 28))
  tb_plot2

#ggsave('GLLVM plot.tiff', tb_plot2, width = 25, height = 20)
  
#### Correlations
  cr1<-data.frame(getResidualCor(fit_reduced_scaled2))#
  
  names(cr1)<-names(ys)
  names(cr1)<-abbreviate(names(cr1), minlength = 15)
  rownames(cr1)<-abbreviate(rownames(cr1), minlength = 15 )
  
  library(rstatix)
## 
## Attaching package: 'rstatix'
## The following object is masked from 'package:stats':
## 
##     filter
  #devtools::install_github("kassambara/ggcorrplot")
  library(ggcorrplot)
## 
## Attaching package: 'ggcorrplot'
## The following object is masked from 'package:rstatix':
## 
##     cor_pmat
  #install.packages("ggpubr")
  library(ggpubr)
## 
## Attaching package: 'ggpubr'
## The following object is masked from 'package:cowplot':
## 
##     get_legend
  cr2<-cor_pmat(cr1)
  
  corplot<-ggcorrplot(cr1, 
                      hc.order = TRUE,
                      outline.col = "white",
                      type = "full",
                      ggtheme = ggplot2::theme_minimal(base_size = 10),
                      tl.cex = 12,
                      p.mat = cr2,
                      sig.level = 0.05,
                      lab_size = 30,
                      #show.diag = F,
                      insig = "blank",
                      # colors = c("#6D9EC1", "white", "#E46726"))
                      colors = c("blue", "white", "red"))+
    theme(axis.text.x = element_text(angle = 90, vjust=0.5, size = 20), axis.text.y = element_text(size = 20),legend.text = element_text(size=20),legend.title = element_text(size=25),legend.key.size = unit(1, 'cm'))+
    theme(plot.margin=unit(c(0.2,0.2,0.2,2),"cm"))
  corplot

#ggsave('Correlation plot.tiff', corplot, width = 25, height = 20)
  
## Save Grid
  tb_mod_combined1<-ggarrange(tb_plot2,corplot,nrow=2,heights=c(0.8,1),widths=c(1,1),align="h",labels="AUTO",font.label=list(size=50))
## Warning: Graphs cannot be horizontally aligned unless the axis parameter is
## set. Placing graphs unaligned.
  tb_mod_combined1

    cowplot::ggsave2('Tb Combined Model Outputs.png',tb_mod_combined1,width=20,height=25)
    cowplot::ggsave2('Tb Combined Model Outputs.pdf',tb_mod_combined1,width=20,height=25)

   # tb_mod_combined2<-ggarrange(tb_plot3,corplot,ncol=1,heights=c(0.8,1),widths=c(0.75,1),align="v",labels="AUTO",font.label=list(size=30))
  # tb_mod_combined2
  # cowplot::ggsave2('Tb Combined Model Outputs Column.pdf',tb_mod_combined2,width=12,height=20)
  #